import scrapy
from python100 import items
from urllib.parse import urljoin

class SpiderSpider(scrapy.Spider):
    name = 'spider'
    allowed_domains = ['www.runoob.com']
    start_urls = ['http://www.runoob.com/python/python-100-examples.html/']

    # 采集练习实例列表
    def parse(self, response):
        urls = response.xpath('//*[@id="content"]/ul/li/a/@href').extract()
        for url in urls:
            yield scrapy.Request(urljoin(response.url,url),callback=self.parse_info)

    # 采集练习实例详细
    # 字段：题目、程序分析、程序源代码
    def parse_info(self,response):
        item=items.Python100Item()
        # 题目
        item['title'] = response.xpath('//*[@id="content"]/p[2]/text()').extract_first()
        # 程序分析
        item['program_analysis'] = response.xpath('//*[@id="content"]/p[3]/text()').extract_first()
        # 程序源代码
        item['program_code'] = response.xpath('string(//*[@id="content"]/div/div[@class="example_code"]|//*[@id="content"]/pre)').extract_first()
        # url
        item['href']=response.url
        return item
